1 Description des données et but

“In the HSerum dataset, a blood sample was collected for 4 different donors. For each sample, 8 sub-samples were measured across 8 days with one sub-sample of each donor per day and permutations according to a latin hypercube sampling method. The total number of available FID signals is then 4*8 = 32. Data were acquired with a 500MHz Bruker Advance spectrometer using a CPMG relaxation-editing sequence with pre-saturation. Spectra are labelled as: Jx-Dy-1D-Tx where x is the day of measurement, y is the donor label, 1D means that the spectra is a one day spectra and Tz is the order of the measure of within the day."

La source de variance principale dans ces données est donc, en principe, le donneur. Le prétraitement devrait donc idéalement donner une matrice de spectres qui maximise la variabilité entre les spectres de différents donneurs (variance “between”) et minimise la variance entre spectres au sein d’un même donneur car ce sont uniquement des répétitions de mesures (variance “within”)

2 Paramètrage du prétraitement et de l’affichage des étapes de prétraitements

3 Lecture des FID

# Lecture des Fid
fidList <- ReadFids(path = data.path, subdirs = subdirs)
Fid_data <- fidList[["Fid_data"]]
Fid_info <- fidList[["Fid_info"]]
# Représentation graphique des FID
if(DrawFid==T) Draw(Re(Fid_data[WhichSpectra,Fid_window]), type.draw = "signal", subtype = "stacked", num.stacked=NumStack,xlab="FID Pur")

# Représentation graphique des FID agrandis
if(DrawFid==T) Draw(Re(Fid_data[WhichSpectra,round(Fid_window/20)]), type.draw = "signal", subtype = "stacked", num.stacked=NumStack,xlab="FID Pur")

4 Prétraitements du FID

4.1 Correction du Group Delay

# FirstOrderPhaseCorrection
Fid_data <- GroupDelayCorrection(Fid_data, Fid_info)
# Représentation graphique des FID
if(DrawFid==T) Draw(Re(Fid_data[WhichSpectra,Fid_window]), type.draw = "signal", subtype = "stacked", num.stacked=NumStack,xlab="Group Delay Correction")

4.2 Solvent Suppression

# FirstOrderPhaseCorrection
Fid_data <- SolventSuppression(Fid_data)
# Représentation graphique des FID
if(DrawFid==T) Draw(Re(Fid_data[WhichSpectra,Fid_window]), type.draw = "signal", subtype = "stacked", num.stacked=NumStack,xlab="Solvent suppression")

4.3 Apodization

# FirstOrderPhaseCorrection
Fid_data <- Apodization(Fid_data,Fid_info)
# Représentation graphique des FID
if(DrawFid==T) Draw(Re(Fid_data[WhichSpectra,Fid_window]), type.draw = "signal", subtype = "stacked", num.stacked=NumStack,xlab="Apodization")

4.4 Transformée de Fourier

# FourierTransform
Spectrum_data <- FourierTransform(Fid_data, Fid_info)
# Représentation graphique des FID
if(DrawSpectra==T) Draw(Re(Spectrum_data[WhichSpectra,Raw_Spec_window]), type.draw = "signal", subtype = "stacked", num.stacked=NumStack,xlab="Fourier Transform")

5 Prétraitements des spectres après la TF

5.1 Correction de phase d’ordre 0

# ZeroOrderPhaseCorrection
Spectrum_data <- ZeroOrderPhaseCorrection(Spectrum_data)

# Représentation graphique des spectres
if(DrawSpectra==T)  Draw(Re(Spectrum_data[WhichSpectra,Raw_Spec_window]), type.draw = "signal", subtype = "stacked", num.stacked=NumStack,xlab="ZeroOrderPhaseCorrection")

5.2 Alignement par rapport au pic de référence (le TMSP)

Spectrum_data <- InternalReferencing(Spectrum_data, Fid_info)
# Représentation graphique des spectres
if(DrawSpectra==T)  Draw(Re(Spectrum_data[WhichSpectra,Raw_Spec_window]), type.draw = "signal", subtype = "stacked", num.stacked=NumStack,xlab="Internal Referencing")

5.3 Baseline correction

Spectrum_data <- BaselineCorrection(Spectrum_data, lambda.bc = 1e+08, p.bc = 0.01)
# Représentation graphique des spectres
if(DrawSpectra==T)  Draw(Re(Spectrum_data[WhichSpectra,Raw_Spec_window]), type.draw = "signal", subtype = "stacked", num.stacked=NumStack,xlab="Internal Referencing")

5.4 Suppression des valeurs négatives

Spectrum_data <- NegativeValuesZeroing(Spectrum_data)
# Représentation graphique des spectres
if(DrawSpectra==T)  Draw(Re(Spectrum_data[WhichSpectra,Raw_Spec_window]), type.draw = "signal", subtype = "stacked", num.stacked=NumStack,xlab="Negative Values Zeroing")

5.5 Alignement - Warping

if(dowarping==TRUE) Spectrum_data <- Warping(Spectrum_data,reference.choice = "before")
# Représentation graphique des spectres
if(DrawSpectra==T)  Draw(Re(Spectrum_data[WhichSpectra,Raw_Spec_window]), type.draw = "signal", subtype = "stacked", num.stacked=NumStack,xlab="Warping")

5.6 Window selection and Bucketing

Spectrum_data <- WindowSelection(Spectrum_data, from.ws = 10, to.ws = 0.2)
# Bucketing avec le Window selection intégré
Spectrum_data <- Bucketing(Spectrum_data, intmeth = "t",mb=mb)
# Représentation graphique des spectres
if(DrawSpectra==T) Draw(Re(Spectrum_data[WhichSpectra,]), type.draw = "signal", subtype = "stacked", num.stacked=NumStack,xlab="Bucketing")

5.7 Suppression des régions non informatives

# Choix de la région à supprimer
Spectrum_data <-  RegionRemoval(Spectrum_data, typeofspectra =typeofspectra)
# Représentation graphique des spectres
if(DrawSpectra==T) Draw(Re(Spectrum_data[WhichSpectra,]), type.draw = "signal", subtype = "stacked", num.stacked=NumStack,xlab="Region Removal")

5.8 Agregation de zones

# Normalization
Spectrum_data <- ZoneAggregation(Spectrum_data, 
fromto.za = list(Citrate =c(2.5, 2.7)))
# Représentation graphique des spectres
if(DrawSpectra==T) Draw(Re(Spectrum_data[WhichSpectra,]), type.draw = "signal", subtype = "stacked", num.stacked=NumStack,xlab="Normalization")

5.9 Normalisation

# Normalization
Spectrum_data <- Normalization(Spectrum_data,type.norm="mean")
# Représentation graphique des spectres
if(DrawSpectra==T) Draw(Re(Spectrum_data[WhichSpectra,]), type.draw = "signal", subtype = "stacked", num.stacked=NumStack,xlab="Normalization")

6 Exportation des résultats

# Sauvegarde des spectres traités en Rdata et fichier Csv
Re_Spectrum_data=Re(Spectrum_data)
if (save == TRUE) {save(Re_Spectrum_data, Fid_info, file=paste0(out.path,dataname,".RData"))
 }
if (export == TRUE) {
utils::write.table(Re_Spectrum_data, file = paste0(out.path,dataname,".csv"),sep=" ;")
utils::write.table(Fid_info, file = paste0(out.path, "/",dataname , "_FidInfo.csv"),sep=" ;")
# ATTENTION le fichier CSV qui sort à une cellule qui manque au dessus à gauche.  Il faut donc, à la main, ouvrir le fichier et reculer les ppms de la première ligne
}

7 Visualisation des spectres finaux

# Visualisation des spectres finaux
# Signals
Draw(Re_Spectrum_data, type.draw = "signal", subtype = "stacked", num.stacked=6)
# PCA scores (and loadings)
Draw(Re_Spectrum_data, type.draw = "pca", type  = "scores", Class = group, axes = c(1:2))
Draw(Re_Spectrum_data, type.draw = "pca", type  = "scores", Class = group, axes = c(3:4))

8 Etude de la répétabilité spectrale

#============ PARAMETRES A MODIFIER
dataset = Re_Spectrum_data
ncompPCA = 4
nClust = 4
nLVPLSDA = 4

8.1 Inertia

# INERTIA

Inertia.res = MBXUCL::Inertia(x = dataset, y = groupN, print = FALSE)
pander(Inertia.res[["Between_within"]])
  BI WI TI
Value 6453 709 7162
Percentage 90.1 9.898 100
pander(Inertia.res[["Per_group"]])
  N Inertia_group Inertia_group100 Inertia_moy_group
Group 1 8 168.8 23.82 21.11
Group 2 8 256.4 36.17 32.05
Group 3 8 90.74 12.8 11.34
Group 4 8 192.9 27.22 24.12
Total 32 709 100 NA

8.2 PCA

# PCA
PCA.res = MBXUCL::SVDforPCA(dataset, ncomp=ncompPCA)

Eigenvalues:

pander(PCA.res[["eigval"]][1:4])
PC1 PC2 PC3 PC4
5066 1386 328.5 93.27
DrawScores(PCA.res, drawNames=TRUE, type.obj = "PCA",
        createWindow=FALSE, main = paste0("PCA score plot for ", dataname),
        color = group, pch = group,axes =c(1,2), )

DrawLoadings(PCA.res,type.obj = "PCA",
        createWindow=FALSE, main = paste0("PCA loadings plot for", dataname),
        axes = c(1:2),  loadingstype="l", num.stacked = 2)
## [[1]]

8.3 Unsupervised clustering

ClustMIC.res = MBXUCL::ClustMIC(Intensities = dataset, nClust = nClust, Trcl = groupN, Dendr = FALSE)
pander(ClustMIC.res)
DunnW DunnKM DBW DBKM RandW RandKM AdjRandW AdjRandKM
0.8246 0.8246 0.6663 0.6663 1 1 1 1

8.4 PLS-DA

PLSDA.res = PLSDA(x = dataset, y = group, nLV = nLVPLSDA, drawRMSEP = TRUE)
perf.plsda = PLSDA.res[4:6]
pander(perf.plsda)
  • RMSEP:

    Y1 Y2 Y3 Y4
    0.09349 0.07655 0.08748 0.1073
  • R2:

    Y1 Y2 Y3 Y4
    0.9764 0.9882 0.9782 0.9663
  • Q2:

    Y1 Y2 Y3 Y4
    0.9534 0.9687 0.9592 0.9386

DrawScores(PLSDA.res, drawNames=TRUE, type.obj = "PLSDA",
        createWindow=FALSE, main = paste0("PLSDA score plot for ", dataname),
        color = group, pch = group,axes =c(1,2))

DrawLoadings(PLSDA.res,  type.obj = "PLSDA",
        createWindow=FALSE, main = paste0("PLSDA loadings plot for", dataname),
        axes = c(1:2),  loadingstype="l", num.stacked = 2)
## [[1]]